[Proposed] 실험돌리기

Author

김보람

Published

April 11, 2024

imports

import pandas as pd
import numpy as np
import sklearn
import pickle 
import time 
import datetime
import warnings
warnings.filterwarnings('ignore')
%run ../function_proposed_gcn.py
with open('../fraudTrain.pkl', 'rb') as file:
    fraudTrain = pickle.load(file)    

try_1: 비율 다 다르게: test_frate조정


df_results = try_1(fraudTrain, 0.7, 0.005, 1e7,0.8)
df_results = try_1(fraudTrain, 0.6, 0.005, 1e7,0.8, prev_results=df_results)
df_results = try_1(fraudTrain, 0.5, 0.005, 1e7,0.8, prev_results=df_results)
df_results = try_1(fraudTrain, 0.4, 0.005, 1e7,0.8, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 1e7,0.8, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 1e7,0.8, prev_results=df_results)
df_results = try_1(fraudTrain, 0.1, 0.005, 1e7,0.8, prev_results=df_results)
ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'../results/{ymdhms}-proposed.csv',index=False)

df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.037296 0.004819 1.000000 0.009592 0.653981 True Proposed 0.7 6006 amt 0.998002 2574 0.004662 None 10000000.0 0.8
1 GCN None 0.869797 0.036946 1.000000 0.071259 0.972233 True Proposed 0.6 7007 amt 0.855002 3003 0.004995 None 10000000.0 0.8
2 GCN None 0.890647 0.043689 1.000000 0.083721 0.972834 True Proposed 0.5 8409 amt 0.712094 3603 0.004996 None 10000000.0 0.8
3 GCN None 0.916963 0.051020 0.909091 0.096618 0.968353 True Proposed 0.4 10511 amt 0.569308 4504 0.004885 None 10000000.0 0.8
4 GCN None 0.923410 0.055785 0.900000 0.105058 0.968485 True Proposed 0.3 14014 amt 0.426431 6006 0.004995 None 10000000.0 0.8
5 GCN None 0.939727 0.067708 0.866667 0.125604 0.968561 True Proposed 0.2 21021 amt 0.283574 9009 0.004995 None 10000000.0 0.8
6 GCN None 0.969863 0.107452 0.688889 0.185907 0.973047 True Proposed 0.1 42042 amt 0.140716 18018 0.004995 None 10000000.0 0.8
df_results = try_1(fraudTrain, 0.09, 0.005, 1e7,0.8)
df_results = try_1(fraudTrain, 0.08, 0.005, 1e7,0.8, prev_results=df_results)
df_results = try_1(fraudTrain, 0.07, 0.005, 1e7,0.8, prev_results=df_results)
df_results = try_1(fraudTrain, 0.06, 0.005, 1e7,0.8, prev_results=df_results)
df_results = try_1(fraudTrain, 0.05, 0.005, 1e7,0.8, prev_results=df_results)
df_results = try_1(fraudTrain, 0.04, 0.005, 1e7,0.8, prev_results=df_results)
df_results = try_1(fraudTrain, 0.03, 0.005, 1e7,0.8, prev_results=df_results)
df_results = try_1(fraudTrain, 0.02, 0.005, 1e7,0.8, prev_results=df_results)
df_results = try_1(fraudTrain, 0.01, 0.005, 1e7,0.8, prev_results=df_results)
ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'../results/{ymdhms}-proposed.csv',index=False)

df_results

try_5: 비율 다 다름: tr은 df50

df_results = try_5(fraudTrain, 10,1e+7,0.8)
df_results = try_5(fraudTrain, 9,1e+7,0.8, prev_results=df_results)
df_results = try_5(fraudTrain, 8,1e+7,0.8, prev_results=df_results)
df_results = try_5(fraudTrain, 7,1e+7,0.8, prev_results=df_results)
df_results = try_5(fraudTrain, 6,1e+7,0.8, prev_results=df_results)
df_results = try_5(fraudTrain, 5,1e+7,0.8, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'../results/{ymdhms}-proposed.csv',index=False)

df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.927785 0.017810 0.971429 0.034979 0.980290 True Proposed 0.129162 9009 amt 0.497724 25978 0.001347 None 10000000.0 0.8
1 GCN None 0.928067 0.019376 0.953488 0.037981 0.977006 True Proposed 0.120661 9009 amt 0.502609 28874 0.001489 None 10000000.0 0.8
2 GCN None 0.920646 0.012639 0.970588 0.024953 0.971934 True Proposed 0.109181 9009 amt 0.499278 32500 0.001046 None 10000000.0 0.8
3 GCN None 0.925283 0.016329 0.938776 0.032100 0.973764 True Proposed 0.098058 9009 amt 0.496725 37127 0.001320 None 10000000.0 0.8
4 GCN None 0.922988 0.017389 0.967213 0.034163 0.977288 True Proposed 0.087737 9009 amt 0.502831 43318 0.001408 None 10000000.0 0.8
5 GCN None 0.918906 0.015439 0.942857 0.030380 0.974102 True Proposed 0.075671 9009 amt 0.504274 51952 0.001347 None 10000000.0 0.8

try_6: 비율 다 같게

df_results = try_6(fraudTrain, 0.2,1e7,0.8)
df_results = try_6(fraudTrain, 0.3,1e7,0.8, prev_results=df_results)
ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'../results/{ymdhms}-proposed.csv',index=False)

df_results

try_7: 비율 다 다름(train_frate 설정)

df_results = try_7(fraudTrain, 0.9, 10,1e+7,0.8)
df_results = try_7(fraudTrain, 0.8, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.7, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.6, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.4, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.3, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.2, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.1, 10,1e+7,0.8, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'../results/{ymdhms}-proposed.csv',index=False)

df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.808770 0.007758 0.975000 0.015394 0.958392 True Proposed 0.146142 5004 amt 0.900080 26089 0.001533 None 10000000.0 0.8
1 GCN None 0.876793 0.011087 0.972973 0.021924 0.972643 True Proposed 0.143218 5630 amt 0.799822 26070 0.001419 None 10000000.0 0.8
2 GCN None 0.905148 0.015544 0.975000 0.030600 0.952527 True Proposed 0.140461 6435 amt 0.702875 26051 0.001535 None 10000000.0 0.8
3 GCN None 0.907249 0.018308 1.000000 0.035957 0.968529 True Proposed 0.135638 7507 amt 0.599707 26016 0.001730 None 10000000.0 0.8
4 GCN None 0.933698 0.018296 0.941176 0.035895 0.970539 True Proposed 0.122217 11261 amt 0.400586 25927 0.001311 None 10000000.0 0.8
5 GCN None 0.942821 0.021898 0.916667 0.042774 0.955167 True Proposed 0.110806 15015 amt 0.299034 25831 0.001394 None 10000000.0 0.8
6 GCN None 0.955435 0.027257 0.969697 0.053024 0.981679 True Proposed 0.094042 22522 amt 0.199671 25648 0.001287 None 10000000.0 0.8
7 GCN None 0.981613 0.032051 0.652174 0.061100 0.967592 True Proposed 0.064221 45045 amt 0.099456 25072 0.000917 None 10000000.0 0.8
df_results = try_7(fraudTrain, 0.09, 10,1e+7,0.8)
df_results = try_7(fraudTrain, 0.08, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.07, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.06, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.04, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.03, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.02, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.01, 10,1e+7,0.8, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'../results/{ymdhms}-proposed.csv',index=False)

df_results

커널 죽음

df_results = try_7(fraudTrain, 0.009, 10,1e+7,0.8)
df_results = try_7(fraudTrain, 0.008, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.007, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.006, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.004, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.003, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.002, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.001, 10,1e+7,0.8, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'../results/{ymdhms}-proposed.csv',index=False)

df_results